# -*- coding: utf-8 -*-

import os
import re

file_object = open('Top200list.txt')
subDomainFile = open('subDomain.txt', 'w')
try:
    for line in file_object:
        subDomainDict = {}
        
        if line.find('\n'):
            line = line[:-1]
        subDomainDict[line] = ''
        #print("aaa:  " + line )
        sitefilename = "site\\" + line + ".htm"

        if os.path.isfile(sitefilename) :
            file_site = open(sitefilename)

            if line.find("www.") == 0 :
                pstr = line[4:]
            else :
                pstr = line

            pstr = r'[\w]+\.' + pstr
            print pstr
            p=re.compile( pstr, re.I )
            #print( "bbb:  " + pstr )
            try:
                for line2 in file_site:
                     tempList = p.findall(line2)
                     #if temp :
                     #    print temp
                     if tempList :
                         for item in tempList:
                             tempStr = item.lower()
                             subDomainDict[tempStr] = ''
                    
            finally:
                file_site.close()
        else:
            print sitefilename + "not found!"
        subDomainList = subDomainDict.keys()
        subDomainList.sort()
        
        #print line + ':'
        subDomainFile.write( '\n\n' )
        for item in subDomainList :
            #print item
            subDomainFile.write(item + '\n')
        
finally:
    file_object.close()
    subDomainFile.close()






    
